This workflow explores correlations between NPS’s water balance model (NPS WBM), climate variables, stream flow, and groundwater levels in and around Bryce Canyon National Park (BRCA).

Downloading data

Park watersheds
# get park boundary
invisible(park_boundary <- suppressWarnings(getParkBoundary(park = "BRCA")))

# pull in the park watersheds as a starting AOI for pulling in WBM data:
invisible(park_watershed <- suppressWarnings(getWatersheds(aoi = park_boundary, save = FALSE)))# st_read('data/park/BRCA/park_watersheds.shp')
## [1] "BRCA watershed delineated!"
# get NHD HR flowlines within the boundary
invisible(park_flowlines <- suppressWarnings(mapNHDPlusHR(aoi = summarize(park_watershed)))) %>% summarize()
## Reading layer `NHDFlowline' from data source 
##   `/Users/kathrynwilli/Library/CloudStorage/OneDrive-Colostate/nps_water_vulnerability/data/nhdplushr/14/NHDPLUS_H_1407_HU4_GDB.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 47124 features and 18 fields
## Geometry type: MULTILINESTRING
## Dimension:     XYZM
## Bounding box:  xmin: -112.3257 ymin: 36.5022 xmax: -109.7637 ymax: 39.11994
## z_range:       zmin: 0 zmax: 0
## m_range:       mmin: 0 mmax: 100
## Geodetic CRS:  NAD83
## Reading layer `NHDFlowline' from data source 
##   `/Users/kathrynwilli/Library/CloudStorage/OneDrive-Colostate/nps_water_vulnerability/data/nhdplushr/16/NHDPLUS_H_1603_HU4_GDB.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 58566 features and 18 fields
## Geometry type: MULTILINESTRING
## Dimension:     XYZM
## Bounding box:  xmin: -114.2141 ymin: 37.40809 xmax: -111.3041 ymax: 40.00865
## z_range:       zmin: 0 zmax: 0
## m_range:       mmin: 0 mmax: 100
## Geodetic CRS:  NAD83
## Simple feature collection with 1 feature and 0 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -112.2799 ymin: 37.43605 xmax: -112.0824 ymax: 37.71128
## Geodetic CRS:  NAD83
##                            Shape
## 1 MULTILINESTRING ((-112.1507...
Water supply watersheds

NPS water supply locations may not necessarily be within the park, and therefore their watersheds may not be represented in the park boundary’s watersheds delineated above. So, here I am pulling in the water supply locations of interest, and delineating their own watersheds to ensure we capture them for pulling in more data. Water supply locations are found using the Utah Division of Water Rights database.

# get Utah points of diversion near BRCA
invisible(POD_Utah <- getPODUtah(aoi = park_watershed, dist = 0.1))
## Reading layer `PointsOfDiversion' from data source 
##   `/private/var/folders/xw/gnpc7n755r58vw8zrxvy_bh40000gn/T/RtmpJGZ5Pm/file1631d1255c395' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 388380 features and 15 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 228787.9 ymin: 427245.7 xmax: 673308.6 ymax: 4663459
## Projected CRS: NAD83 / UTM zone 12N
# filter to water supplies of interest
POD_park <- POD_Utah %>%
  dplyr::filter(WRNUM %in% c("61-893", "2061001M00")) %>% #, # current supply
  #"61-1143")) %>% # potential future supply
  distinct(LOCATION, .keep_all = TRUE) 

watersupply_watershed <- vector("list", nrow(POD_park))

# get the watersheds of different points:
invisible(for(i in 1:nrow(POD_park)){
  
  watersupply_watershed[[i]] <- POD_park[i,] %>% 
    getXYWatersheds(sf = ., coordinates = NULL)
  
})

watersupply_watershed <- watersupply_watershed %>%
  bind_rows() %>% 
  distinct(featureid, .keep_all = TRUE) %>%
  #... hard to tell from topo what the future supply's real watershed looks like... 
  # But! This future supply is a deep (100 ft) well, so it may not be as important.
  # I'll need to look into this further at some point.
  filter(!featureid %in% c("10807991", "10807997"))

invisible(watersupply_flowlines <- mapNHDPlusHR(aoi = summarize(watersupply_watershed)))
## Reading layer `NHDFlowline' from data source 
##   `/Users/kathrynwilli/Library/CloudStorage/OneDrive-Colostate/nps_water_vulnerability/data/nhdplushr/14/NHDPLUS_H_1407_HU4_GDB.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 47124 features and 18 fields
## Geometry type: MULTILINESTRING
## Dimension:     XYZM
## Bounding box:  xmin: -112.3257 ymin: 36.5022 xmax: -109.7637 ymax: 39.11994
## z_range:       zmin: 0 zmax: 0
## m_range:       mmin: 0 mmax: 100
## Geodetic CRS:  NAD83
## Reading layer `NHDFlowline' from data source 
##   `/Users/kathrynwilli/Library/CloudStorage/OneDrive-Colostate/nps_water_vulnerability/data/nhdplushr/16/NHDPLUS_H_1603_HU4_GDB.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 58566 features and 18 fields
## Geometry type: MULTILINESTRING
## Dimension:     XYZM
## Bounding box:  xmin: -114.2141 ymin: 37.40809 xmax: -111.3041 ymax: 40.00865
## z_range:       zmin: 0 zmax: 0
## m_range:       mmin: 0 mmax: 100
## Geodetic CRS:  NAD83
USGS stream gage watersheds

For many parks, the nearest USGS stream gage is far away. Therefore, we are pulling all NWIS gages within 100 km of the park boundary. Of those, we only select stream gages that are considered “reference” gages in the GAGES-II database (Falcone, 2011). Then, we delineate each of those gages’ watersheds using the get_nldi_basin() function from the {nhdplusTools} package:

# area around the park ~ 100 km away:
aoi <- sf::st_buffer(park_boundary, dist = 0.3)
# locate NWIS daily flow locations in that radius:
nwis <- listNWIS(aoi = aoi, dist = 0) %>%
  # daily values...
  filter(data_type_cd == "dv",
         # ... of flow
         code == "00060") %>%
  # for comparison purposes, need data from 1980 onwards
  filter(year(end_date) >= 1980)

# find the NWIS gages that are (crudely, probably) representative of natural conditions:
ref_gages <- get_gagesII(id = nwis$site_no) %>%
  filter(class == "Ref")

nwis <- nwis %>%
  filter(site_no %in% ref_gages$staid) %>%
  left_join(st_drop_geometry(ref_gages), by = c("site_no" ="staid"))

# for(i in 1:nrow(nwis)){
#   nwis$comid[i] <- discover_nhdplus_id(nwis[i,])
# }

# pull those sites' flow data
invisible(getNWIS(inventory = nwis, park = "misc", path = "data/"))
## [1] "10173450:Discharge, cubic feet per second for misc done!"
## [1] "10183900:Discharge, cubic feet per second for misc done!"
# where are they?
# mapview(nwis) + mapview(park_boundary)

nldi_finder <- function(site_no){
  # Now, get those gages' watersheds using get_nldi_basin in the {nhdplusTools}.
  # Input for NLDI requires "USGS-" before the gage number
  nldi_nwis <- list(featureSource = "nwissite",
                    featureID = paste0("USGS-", site_no))
  
  invisible(gage_basin <- nhdplusTools::get_nldi_basin(nldi_feature = nldi_nwis) %>%
    st_transform(., 4269) %>%
    mutate(site_no = site_no))
  
  return(gage_basin)
  
}

nldi_meta <- function(site_no){
  # Now, get those gages' watersheds using get_nldi_basin in the {nhdplusTools}.
  # Input for NLDI requires "USGS-" before the gage number
  nldi_nwis <- list(featureSource = "nwissite",
                    featureID = paste0("USGS-", site_no))
  
  invisible(gage_basin <- nhdplusTools::get_nldi_characteristics(nldi_feature = nldi_nwis, type = "total")[[1]] %>%
    filter(characteristic_id %in% c("TOT_ELEV_MEAN", "TOT_ELEV_MAX", "TOT_ELEV_MIN")) %>%
    pivot_wider(-percent_nodata, values_from = characteristic_value, names_from = characteristic_id))
  
  return(gage_basin)
  
}

nldi_watershed <- nwis$site_no %>%
  map_dfr(~nldi_finder(site_no = .)) %>%
  mutate(data = map(site_no, ~nldi_meta(site_no = .))) %>%
  unnest() %>%
  left_join(st_drop_geometry(nwis), by = "site_no")

invisible(nldi_flowlines <- mapNHDPlusHR(aoi = summarize(nldi_watershed)) %>% summarize())
## Reading layer `NHDFlowline' from data source 
##   `/Users/kathrynwilli/Library/CloudStorage/OneDrive-Colostate/nps_water_vulnerability/data/nhdplushr/15/NHDPLUS_H_1501_HU4_GDB.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 169544 features and 18 fields
## Geometry type: MULTILINESTRING
## Dimension:     XYZM
## Bounding box:  xmin: -115.7025 ymin: 35.11076 xmax: -111.5117 ymax: 39.29939
## z_range:       zmin: 0 zmax: 0
## m_range:       mmin: 0 mmax: 100
## Geodetic CRS:  NAD83
## Reading layer `NHDFlowline' from data source 
##   `/Users/kathrynwilli/Library/CloudStorage/OneDrive-Colostate/nps_water_vulnerability/data/nhdplushr/14/NHDPLUS_H_1407_HU4_GDB.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 47124 features and 18 fields
## Geometry type: MULTILINESTRING
## Dimension:     XYZM
## Bounding box:  xmin: -112.3257 ymin: 36.5022 xmax: -109.7637 ymax: 39.11994
## z_range:       zmin: 0 zmax: 0
## m_range:       mmin: 0 mmax: 100
## Geodetic CRS:  NAD83
## Reading layer `NHDFlowline' from data source 
##   `/Users/kathrynwilli/Library/CloudStorage/OneDrive-Colostate/nps_water_vulnerability/data/nhdplushr/16/NHDPLUS_H_1603_HU4_GDB.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 58566 features and 18 fields
## Geometry type: MULTILINESTRING
## Dimension:     XYZM
## Bounding box:  xmin: -114.2141 ymin: 37.40809 xmax: -111.3041 ymax: 40.00865
## z_range:       zmin: 0 zmax: 0
## m_range:       mmin: 0 mmax: 100
## Geodetic CRS:  NAD83

Lastly, we dissolve all of these watersheds into a single shapefile that we can use to download all future data.

# our AOI blob!
final_aoi <- park_watershed %>%
  bind_rows(watersupply_watershed) %>%
  bind_rows(nldi_watershed) %>%
  summarize()

Pulling in explanatory variables to explain water supply levels

KW REMINDER Running list of things I’d like to explore:

  • GridMet (temp, precip, P-PET?, other wbm vars)… with a lag?
  • DayMet (temp, precip, P-PET?, other wbm vars)… with a lag?
  • StreamCat variables?
  • Visitation and/or water use?
  • Nearby weather station data
  • Nearby stream data

Here I pull in NPS’s gridded monthly water balance predictions (4km resolution). This data comes from Which Water Balance Model Do You Need? -> Historical THREDDS. The function below downloads and crops these gridded monthly water balance predictions (from 1980-2022) for our area of interest. This includes the WBM variables soil water content, runoff, rain, accumulated snow water equivalent, PET, deficit, and AET. It then converts the information into a table. The coordinates of the grid centroids are preserved. For all variables across all years, this takes a little over an hour (for daily GridMET).

options(timeout = 900)
getHistoricWBMGridMET(park = "misc", aoi = summarize(watersupply_watershed), path = "data/", wb_vars = c("soil_water", "runoff", "rain", "accumswe", "PET", "Deficit", "AET"))

# getHistoricWBMDayMET(park = "misc", aoi = final_aoi, path = "data/", wb_vars = c("soil_water", "runoff", "rain", "agdd", "accumswe", "PET", "Deficit", "AET"))

We can also download and explore additional gridded climate characteristics from GridMET, including temperature, precipitation, relative humidity, PET, wind speed, and vapor pressure deficit:

# 4km resolution
invisible(gridmet_vars <- get_gridMET(path = "data/", 
                            park = "misc", 
                            aoi = final_aoi, 
                            vars = c("tmmx", "tmmn", "pr", "rmax", "rmin", "pet", "etr", "vpd", "vs"), 
                            start = "1979-01-01", 
                            end = "2022-12-31"))

raster_puller <- function(filename, point, aoi){
  
  
  data <-  data.table::fread(filename) 
  
  try(data <- data %>%
        mutate(x = lon,
               y = lat))
  
  if(is.null(aoi)){
    
   invisible(tab_data <- data %>%   
      st_as_sf(coords = c("x","y"), crs = st_crs(point)) %>%
      as_tibble())
    
    ID_raster <- data %>%
      group_by(x, y) %>%
      summarize() %>%
      rowid_to_column() %>%
      st_as_sf(coords = c("x","y"), crs = st_crs(point))
    
    final <- point %>%
      mutate(rowid = sf::st_nearest_feature(., ID_raster)) %>%
      inner_join(., as_tibble(ID_raster) , by = "rowid") %>%
      inner_join(tab_data, by = c("geometry.y" = "geometry"))
    
  }
  
  if(is.null(point)){
    
    invisible(final <-  data %>%
      st_as_sf(coords = c("x","y"), crs = st_crs(aoi)) %>%
      .[aoi,])
    
  }
  
  return(final)
}

invisible(gridmet_data <- list.files('data/misc/gridmet_hist/', full.names = TRUE) %>%
  map(~raster_puller(filename = ., aoi = NULL, point = POD_park[1,])) %>%
  bind_rows() %>%
  mutate(value = case_when(variable %in% c("tmmx", "rmax") ~ max,
                           variable %in% c("tmmn", "rmin") ~ min,
                           variable %in% c("vpd", "vs") ~ mean,
                           variable %in% c("pet", "etr", "pr") ~ sum)) %>%
  st_drop_geometry() %>%
  dplyr::select(ym, variable, value) %>%
  mutate(ym = ym(substr(ym, 1, 7))) %>%
  distinct(.keep_all = TRUE) %>%
  pivot_wider(names_from = variable, values_from = value))
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
colnames(gridmet_data) <- paste0(colnames(gridmet_data), ".gridmet")

invisible(gridmet_ws <- list.files('data/misc/gridmet_hist/', full.names = TRUE) %>%
  map(~raster_puller(filename = ., point = NULL, aoi = summarize(watersupply_watershed))) %>%
  bind_rows() %>%
  mutate(value = case_when(variable %in% c("tmmx", "rmax") ~ max,
                           variable %in% c("tmmn", "rmin") ~ min,
                           variable %in% c("vpd", "vs") ~ mean,
                           variable %in% c("pet", "etr", "pr") ~ sum)) %>%
  as_tibble() %>%
  dplyr::select(ym, variable, value, geometry) %>%
  mutate(ym = ym(substr(ym, 1, 7))) %>%
  distinct(.keep_all = TRUE) %>%
  pivot_wider(names_from = variable, values_from = value) %>%
  group_by(ym) %>%
  summarize_all(mean, na.rm = TRUE) %>%
  select(-geometry))
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
## Error in mutate(., x = lon, y = lat) : ℹ In argument: `x = lon`.
## Caused by error:
## ! object 'lon' not found
colnames(gridmet_ws) <- paste0(colnames(gridmet_ws), ".gridmet.ws")

# 1km resolution
gridmet_wbm <- list.files('data/misc/wbm_gridmet_hist/', full.names = TRUE) %>%
  map(~raster_puller(filename = ., point = POD_park[1], aoi = NULL)) %>%
  bind_rows() %>%
  mutate(DT = ymd_hms(time),
         ym = ym(substr(DT, 1, 7))) %>%
  select(val, var, ym) %>%
  st_drop_geometry() %>%
  distinct(.keep_all = TRUE) %>%
  pivot_wider(names_from = "var", values_from = "val")
colnames(gridmet_wbm) <- paste0(colnames(gridmet_wbm), ".wbm")

gridmet_wbm_ws <- list.files('data/misc/wbm_gridmet_hist/', full.names = TRUE) %>%
  map(~raster_puller(filename = ., point = NULL, aoi = summarize(watersupply_watershed))) %>%
  bind_rows() %>%
  mutate(DT = ymd_hms(time),
         ym = ym(substr(DT, 1, 7))) %>%
  select(val, var, ym) %>%
  as_tibble() %>%
  distinct(.keep_all = TRUE) %>%
  pivot_wider(names_from = "var", values_from = "val") %>%
  group_by(ym) %>%
  summarize_all(mean, na.rm = TRUE) %>%
  select(-geometry)
colnames(gridmet_wbm_ws) <- paste0(colnames(gridmet_wbm_ws), ".wbm.ws")


# How does precipitation compare to streamflow and well data?
# Find NOAA weather stations near our areas of interest
invisible(noaa_data <- getNOAA(aoi = st_buffer(final_aoi, 0.1), park = "misc")) %>%
  #              prcp = Precipitation (tenths of mm)
  #              snow = Snowfall (mm)
  #                snwd = Snow depth (mm)
  #              tmax = Maximum temperature (tenths of degrees C)
  #              tmin = Minimum temperature (tenths of degrees C)
  #              elevation = meters
  select(name, id, date, tmax, tmin, prcp, snow, snwd, elevation, geometry) %>%
  # Raw NOAA data is in "tenths of" units for temp and precip:
  mutate_at(c("tmax", "tmin", "prcp"), function(x) {x * 0.10})
## [1] "NOAA data for specified dates saved to respective NPS park folder in: /Users/kathrynwilli/Desktop/0_my_git/nps_water_vulnerability/"
## Simple feature collection with 319536 features and 9 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -112.92 ymin: 37.4403 xmax: -112.0811 ymax: 37.75
## Geodetic CRS:  NAD83
## # A tibble: 319,536 × 10
##    name  id          date        tmax  tmin  prcp  snow  snwd elevation
##  * <chr> <chr>       <date>     <dbl> <dbl> <dbl> <dbl> <dbl>     <dbl>
##  1 ALTON USC00420086 1915-05-01  NA    -6.7  30.5    NA    NA     2164.
##  2 ALTON USC00420086 1915-05-02   1.1  -7.8  15.7    NA    NA     2164.
##  3 ALTON USC00420086 1915-05-03   6.7 -15    NA      NA    NA     2164.
##  4 ALTON USC00420086 1915-05-04   8.9  -6.7  NA      NA    NA     2164.
##  5 ALTON USC00420086 1915-05-05   6.7  -4.4   2      NA    NA     2164.
##  6 ALTON USC00420086 1915-05-06   8.9  -2.2  NA      NA    NA     2164.
##  7 ALTON USC00420086 1915-05-07  14.4  -3.3   1.8    NA    NA     2164.
##  8 ALTON USC00420086 1915-05-08  15.6  -1.1  NA      NA    NA     2164.
##  9 ALTON USC00420086 1915-05-09  14.4   0    NA      NA    NA     2164.
## 10 ALTON USC00420086 1915-05-10  18.3  -1.1  NA      NA    NA     2164.
## # ℹ 319,526 more rows
## # ℹ 1 more variable: geometry <POINT [°]>
noaa_data_sub <- noaa_data %>%
  data.table::data.table() %>%
  mutate(ym = ym(substr(date, 1, 7))) %>%
  filter(year(ym) >= 1999 & year(ym) <= 2022) %>%
  group_by(name, id, ym, geometry) %>%
  summarize(tmax = mean(tmax, na.rm = TRUE),
            tmin = mean(tmin, na.rm = TRUE),
            tmean = ((tmax+tmin)/2),
            prcp = sum(prcp, na.rm = TRUE),
            snow = sum(snow, na.rm = TRUE),
            snwd = max(snwd, na.rm = TRUE)) %>%
  st_as_sf()

tab_data <- noaa_data_sub %>%
  as_tibble()

ID_noaa <- noaa_data_sub %>%
  group_by(name, id) %>%
  summarize() %>%
  rowid_to_column() 


# Find the nearest NOAA station to the wells:
nearby_noaa <- ID_noaa %>%
  filter(rowid == st_nearest_feature(POD_park, ID_noaa)[1]) %>%
  st_drop_geometry() %>%
  inner_join(., tab_data, by = "name") %>% 
  dplyr::select(ym, tmax, tmin, tmean, prcp, snow, snwd)

colnames(nearby_noaa) <- paste0(colnames(nearby_noaa), ".noaa")

East Creek well data

BRCA has been measuring static water levels at their East Creek water supply wells since at least 2000. These wells are the current source for the park’s public water supply. Well 1 is 90 feet deep with a suspected perforated casing from 50-60 feet below ground surface, and is the furthest well “upstream”. Well 2 is 522 feet deep with a suspected 10-foot casing at the bottom of the well, and is about 400 feet directly down the valley from Well 1. Meanwhile, there is a third well that is likely similary constructed as Well 2 and located next to Well 2. Well 3 has no pump and has likely never been in use.

A report written in 1998 about the wellfield described the system as being an alluvial aquifer recharged by infiltration of rainflow and streamflow into the East Creek drainage basin. Discharge of bedrock springs, such as Whiteman Spring, provide an additional source of recharge. The report also purported that the aquifer stores approximately 2.6 million cubic feet of water. A pump test performed in 1997 was also described, and culminated in the following results:

- Hydraulic conductivity ratio - 1:10 (ratio of vertical to horizontal hydraulic conductivity)

- Transmissivity (T) = 8900 ft^2/day

- Specific Yield (Sy) = 0.37

- Kr = 150 feet/day

- Aquifer thickness = 60 feet

- Hydraulic gradient between well 1 and 2 = 0.0057 ft/ft

- Hydraulic gradient upstream of well field = 0.0042 ft/ft

- Average daily use = 112,000 gallons/day (or, 15,000 cubic feet/day) … during summer pumpage… so greatly overestimates winter use.

well_data <- read_csv('data/park/BRCA/manual/BRCA_Well_Data.csv') 
names(well_data) <- make.names(names(well_data))
well_monthly <- well_data %>%
  dplyr::mutate(static_in = as.numeric(str_replace(Static..in., "-", "")),
                date = mdy(Date),
                ym = ym(substr(date, 1, 7))) %>%
  dplyr::select(date,
                ym,
                well = Well,
                static_in) %>%
  dplyr::group_by(well, ym) %>%
  dplyr::summarize(mean_static = mean(static_in, na.rm = TRUE))
well_daily <- well_data %>%
  dplyr::mutate(static_in = as.numeric(str_replace(Static..in., "-", "")),
                date = mdy(Date),
                ym = ym(substr(date, 1, 7))) %>%
  dplyr::select(date,
                ym,
                well = Well,
                static_in)

# The state of Utah also tracks monthly water use of that system:

invisible(water_supply <- getWaterSuppliersUtah(aoi = park_boundary) %>%
  filter(grepl("National Park", WRNAME, ignore.case=TRUE)) %>%
  .$WRID)
## Reading layer `MnI_current' from data source 
##   `/private/var/folders/xw/gnpc7n755r58vw8zrxvy_bh40000gn/T/RtmpJGZ5Pm/file1631d7a716225' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1349 features and 27 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 242809.9 ymin: 4089230 xmax: 670876.3 ymax: 4725609
## Projected CRS: NAD83 / UTM zone 12N
water_use_1 <- getWaterUseUtah(WRID = water_supply)[[1]] %>%
  slice(1:39) %>%
  pivot_longer(-c("Year", "Method of Measurement"), names_to = "month", values_to = "use_acre_feet") %>%
  mutate(ym = ym(paste0(Year, "-", month))) %>%
  filter(month != "Annual inAcre Feet") %>%
  select(ym, use_acre_feet) %>%
  mutate(well = "Well 1")

water_use_2 <- getWaterUseUtah(WRID = water_supply)[[1]] %>%
  slice(51:nrow(.)) %>%
  dplyr::filter(!is.na(as.numeric(Year))) %>%
  pivot_longer(-c("Year", "Method of Measurement"), names_to = "month", values_to = "use_acre_feet") %>%
  mutate(ym = ym(paste0(Year, "-", month))) %>%
  filter(month != "Annual inAcre Feet") %>%
  select(ym, use_acre_feet) %>%
  mutate(well = "Well 2")

both_wells <- water_use_1 %>%
  bind_rows(water_use_2)

# Here we combine the average monthly static water levels with the monthly water use:

well_munge <- well_monthly %>%
  left_join(both_wells, by = c("ym", "well"))

all_nwis_flow <- list.files("data/misc/nwis/dv/", full.names = TRUE) %>%
  map_dfr(~ read_csv(.) %>% 
            data.table::data.table() %>% 
            mutate(across(everything(), as.character))) %>%
  bind_rows() %>%
  mutate(dateTime = lubridate::ymd(dateTime),
         ym = ym(substr(dateTime, 1, 7)),
         flow_cfs = as.numeric(X_00060_00003)) %>%
  left_join(st_drop_geometry(nldi_watershed), by = "site_no") %>%
  mutate(flow_mmd = (flow_cfs * 28.3168) / (drain_sqkm * 1e6 * 1e6)) %>%
  group_by(site_no, ym) %>%
  summarize(mean_mmd = mean(flow_mmd, na.rm = TRUE)) %>%
  pivot_wider(names_from = "site_no", values_from = "mean_mmd") %>%
  select(1, mammoth_ck = 2, usgs2 = 3)

# NPS tracks monthly total park visitors. Here we pull that information in for the park:
all_brca_data <- getUnitVisitation(units = "BRCA", startYear = 2000, endYear = 2023) %>%
  mutate(ym = ym(paste0(Year, "-", Month))) %>%
  select(ym, RecreationVisitors) %>%
  right_join(., well_munge, by = "ym") %>%
  inner_join(., gridmet_data, by = c("ym"="ym.gridmet")) %>%
  dplyr::select(well, ym, mean_static, 
                rec_visitation = RecreationVisitors, 
                use_acre_feet, 
                etr.gridmet, pet.gridmet, pr.gridmet, rmax.gridmet, rmin.gridmet, tmmn.gridmet, tmmx.gridmet, vpd.gridmet, vs.gridmet) %>%
  distinct(.keep_all = TRUE) %>%
  inner_join(gridmet_ws, by = c("ym" = "ym.gridmet.ws")) %>%
  inner_join(all_nwis_flow, by = c("ym"="ym")) %>%
  inner_join(gridmet_wbm, by = c("ym" = "ym.wbm")) %>%
  inner_join(gridmet_wbm_ws, by = c("ym" = "ym.wbm.ws")) %>%
  inner_join(nearby_noaa, by = c("ym"="ym.noaa")) %>%
  # new variables:
  mutate(mean_static = 25.4 * (mean_static),
         tmean.gridmet = (tmmn.gridmet + tmmx.gridmet)/2,
         p.pet.gridmet = pr.gridmet - pet.gridmet,
         p.pet.wbm = pr.gridmet - pet.wbm,
         p.t.gridmet = pr.gridmet * tmean.gridmet,
         runoff.soilwater.snow.wbm = runoff.wbm + soil_water.wbm + accumswe.wbm,
         season = case_when(month(ym) %in% c(12,1,2) ~ "Winter",
                            month(ym) %in% c(3,4,5) ~ "Spring",
                            month(ym) %in% c(6,7,8) ~ "Summer",
                            month(ym) %in% c(9,10,11) ~ "Fall")) %>%
  # well data is super patchy until 2005
  filter(year(ym) > 2003)

Figures

Here I’m exploring the data. Reminder, all climate variables are taken from the 4-km grid that the water supply coordinates are contained within; as well as means across the water supply’s drainage basin (i.e., East Creek’s watershed, denoted with “.ws” tacked onto the variable name). Meanwhile, the WBM outputs are 1km.

Monthly well data at BRCA:

Static level is the depth to the water table from the ground surface when the pump is not running. Meaning, the bigger the value, the lower the water levels in the well casing (and less water). This is a proxy for the natural water levels of the aquifer.

well_data <- ggplot(data = all_brca_data) +
  geom_line(aes(x = ym, y = mean_static, color = well)) +
  # geom_col(aes( x = ym, y = pr))
  #facet_wrap(~well, nrow = 2) +
  theme_bw() +
  xlab("Date") +
  ylab("Static Water Level (mm)") +
  scale_color_manual(values = c("#E69F00", "darkblue")) +  # Set line colors
  theme(legend.position = "none")  # Move legend to the bottom

streamflow <- ggplot(data = all_brca_data) +
  geom_line(aes(x = ym, y = mammoth_ck)) +
  theme_bw() +
  xlab("Date") +
  ylab("Flow (mmd)")

precip <- ggplot(data = all_brca_data) +
  geom_col(aes( x = ym, y = pr.gridmet)) +
  theme_bw() +
  xlab("Date") +
  ylab("Precipitation (mm?)")

ggarrange(well_data, streamflow, precip, nrow = 3)

Well data coupled with water use and park visitation:

level <- ggplot() +
  geom_line(data = all_brca_data, 
            aes(x = ym, y = mean_static, color = well)) +
  # geom_line(data = filter(all_brca_data, !is.na(use_acre_feet)), 
  #           aes(x = ym, y = as.numeric(use_acre_feet)*100),
  #           color = "grey") +
  # facet_wrap(~well, nrow = 2) +
  theme_bw() +
  xlab("Date") +
  theme(legend.position = "none") +
  ylab("Static Well Level (mm)")
# scale_y_continuous(name = "Static water level (mm)",
#                    sec.axis = sec_axis(~.*100,
#                                        name = "Water use (acre-feet)",
#                                        breaks = seq(0, 1500, 10))) +
# theme(axis.title.y.right = element_text(color="grey"),
#       axis.title.y.left = element_text(color="black"))

use <- ggplot() +
  geom_line(data = all_brca_data,
            aes(x = ym, y = as.numeric(use_acre_feet), color = well)) +
  theme_bw() +
  xlab("Date") +
  ylab("Use (acre-ft)") +
  theme(legend.position = "none") 

visitors <- ggplot() +
  geom_line(data = filter(all_brca_data, !is.na(rec_visitation) & well == "Well 1"), 
            aes(x = ym, y = as.numeric(rec_visitation)),
            color = "blue") +
  theme_bw() +
  xlab("Date") +
  ylab("Visitors")

ggarrange(level, use, visitors, nrow = 3)

Comparing nearby NWIS data at Mommth Creek and well data (both are logged)

ggplot(data = all_brca_data) +
  geom_point(aes(x = log10(mean_static), y = log10(mammoth_ck), color = season)) +
  xlab("log(USGS Streamflow)") +
  ylab("log(Well Depth)") +
  theme_bw() +
  scale_color_manual(values = c("#56B4E9", "#F0E442","#D55E00", "maroon"))

WBM PET (Oudin) vs. GridMET PET (Reference grass ET - Penman Monteith?)

ggplot(data = all_brca_data) +
  geom_point(aes(x = (pet.gridmet), y = (pet.wbm), color = season)) +
  xlab("GridMET - Ref. Grass, Penman Monteith?") +
  ylab("NPS WBM - Oudin?") +
  theme_bw() +
  scale_color_manual(values = c("#56B4E9", "#F0E442","#D55E00", "maroon"))

PET is in the same units so why are the WBM values so big?? WBM is blue. GridMET is maroon.

ggplot(data = all_brca_data) +
  geom_line(aes(x = (ym), y = (pet.gridmet)), color = "#F0E442") +
  geom_line(aes(x = (ym), y = (pet.wbm)), color = "#D55E00") +
  geom_line(aes(x = (ym), y = (pet.gridmet.ws)), color = "maroon") +
  geom_line(aes(x = (ym), y = (pet.wbm.ws)), color = "#56B4E9") +
  xlab("Month") +
  ylab("PET, a few different ways") +
  theme_bw() +
  scale_color_manual(values = c("#56B4E9", "#F0E442","#D55E00", "maroon"))

Accumulated Snow Water Equivalent (SWE, mm): total SWE at each location. Estimated using equations described by Tercek and Rodman (2016) with melt threshold temperatures at each lkm grid cell provided by Jennings et al. (2018). Maroon points are the watershed avg, as opposed to the point the water supply is contained within.

ggplot(data = all_brca_data) +
  geom_line(aes(x = (ym), y = (accumswe.wbm.ws)), color = "#56B4E9") +
  geom_point(aes(x = (ym), y = (accumswe.wbm)), color = "maroon") +
  xlab("Month") +
  ylab("SWE, mm") +
  theme_bw() 

Explore relationship between groundwater levels and precip at a seasonal or yearly interval. Does peak SWE have a better relationship to level than precip alone?

all_brca_data_annual <- all_brca_data %>%
  mutate(wyear = ifelse(month(ym) >= 10, year(ym) + 1, year(ym))) %>%
  group_by(wyear, well) %>%
  summarize(peak_swe = max(accumswe.wbm, na.rm = TRUE),
            mean_static = mean(mean_static, na.rm = TRUE),
            mean_visitation = sum(rec_visitation, na.rm = TRUE),
            mean_use = mean(as.numeric(use_acre_feet), na.rm = TRUE),
            total_precip = sum(pr.gridmet, na.rm = TRUE))


a <- ggplot(data = all_brca_data_annual) +
  geom_line(aes(x = wyear, y = mean_static, color = well)) +
  theme_bw() +
  xlab("Water Year") +
  ylab("Static Water Level (mm)") +
  theme(legend.position = "none") 

b <- ggplot(data = all_brca_data_annual) +
  geom_line(aes(x = wyear, y = peak_swe)) +
  theme_bw() +
  xlab("Date") +
  ylab("Peak SWE (mm)") +
  theme(legend.position = "none") 

c <- ggplot(data = all_brca_data_annual) +
  geom_line(aes(x = wyear, y = total_precip)) +
  theme_bw() +
  xlab("Date") +
  ylab("Total Precip (mm)") +
  theme(legend.position = "none") 

d <- ggplot(data = all_brca_data_annual) +
  geom_line(aes(x = wyear, y = mean_visitation)) +
  theme_bw() +
  xlab("Date") +
  ylab("Total Vistors") +
  theme(legend.position = "none") 

e <- ggplot(data = all_brca_data_annual) +
  geom_line(aes(x = wyear, y = mean_use, color = well)) +
  theme_bw() +
  xlab("Date") +
  ylab("Use (acre-feet)") +
  theme(legend.position = "none") 

ggarrange(a,b,c,d,e, nrow = 5, align = "v")

Cross-correlation

Interpretation: a positive lag indicates that the well levels shift in response to the other variable. One lag unit = one month.

site <- unique(all_brca_data$well)

sub_brca_data<- filter(all_brca_data, well == site[1])

ccf_munger <- function(column){
  
  ts_vector <- sub_brca_data %>%
    select(column) %>%
    pull()
  
  ts <- zoo::na.approx(ts(as.numeric(ts_vector), start = 1))
  
  well.depth <- zoo::na.approx(ts(sub_brca_data$mean_static, start = 1))
  
  result <- ccf(ts, well.depth, na.action = na.pass, type = "correlation", plot = FALSE)
  
  plot(result, main = paste0("Correlation of well data with ", column), xlab = "Lag", ylab = paste0("Correlation"))
  
  ccf_df <- tibble(lag_months = c(result$lag), 
                   correlation = c(result$acf)) %>%
    mutate(variable = column)
  
  return(ccf_df)
  
}

ccfs <- names(sub_brca_data)[c(4:24, 26:39, 41:43, 47:51)] %>%
  map(~ ccf_munger(column = .)) %>%
  bind_rows() 

ccf <- ccfs %>%
  group_by(variable) %>%
  # Vars impact well, never the other way around
  filter((lag_months) >= 0 & lag_months <= 12) %>%
  # what are the highest-correlated lags?
  filter(abs(correlation) == max(abs(correlation), na.rm = TRUE)) 

ccf %>%
  DT::datatable(filter = "none",
                caption = 'Results of cross-correlation of misc. variables with well level. Listed per variable is the highest-correlated lag time.',
                rownames = FALSE,
                fillContainer = T,
                escape = FALSE,
                options = list(dom = 't',
                               pageLength = nrow(.),
                               scrollY = '300px'))

Water Use and Visitor Use – Is there a real relationship?

vis <- all_brca_data %>% 
         group_by(ym, season) %>% 
         summarize(rec_visitation = mean(rec_visitation, na.rm = T),
                   use_acre_feet = sum(as.numeric(use_acre_feet), na.rm = T))

ggplot(data = vis) +
  geom_point(aes(x = rec_visitation, y = use_acre_feet, color = season)) +
  xlab("BRCA Visitation") +
  ylab("Water Use (acre-feet)") +
  theme_bw() +
  scale_color_manual(values = c("#56B4E9", "#F0E442","#D55E00", "maroon")) +
  # Add linear trend line and R-squared
  geom_smooth(aes(x = rec_visitation, y = use_acre_feet), color = "black", method = "lm", se = FALSE) +
  # Add R-squared to plot
  annotate("text", 
           x = max(vis$rec_visitation) - 57000, 
           y = min(vis$use_acre_feet) + 1,
           label = paste("R² =", round(summary(lm(use_acre_feet ~ rec_visitation, data = vis))$r.squared, 2)),
           size = 5)

  visitors <- (ts(vis$rec_visitation, start = 1))
  
  water_use <- (ts(vis$use_acre_feet, start = 1))
  
  result <- ccf(visitors, water_use, na.action = na.pass, type = "correlation", plot = FALSE)
  
  plot(result, main = paste0("Correlation of vistation and water use"), xlab = "Lag", ylab = paste0("Correlation"))

Groundwater flow and use

We need to estimate groundwater flow. How can we do this? We have the following characteristic of the aquifer, can we use these to estimate that when we couple them with water level data?

KW: length/K? depth?

- Hydraulic conductivity ratio - 1:10 (ratio of vertical to horizontal hydraulic conductivity)

- Transmissivity (T) = 8900 ft^2/day

- Specific Yield (Sy) = 0.37

- Kr = 150 feet/day

- Aquifer thickness = 60 feet

- Hydraulic gradient between well 1 and 2 = 0.0057 ft/ft

- Hydraulic gradient upstream of well field = 0.0042 ft/ft

- Average daily use = 112,000 gallons/day (or, 15,000 cubic feet/day) … during summer pumpage… so greatly overestimates winter use.

Once we’ve gotten an estimate of groundwater flow - we can compare this to precip (Q_gw/P?). We can also compare it to use (Use/Q_gw?).